export VLLM_ENGINE_ITERATION_TIMEOUT_S=36000
export VLLM_RPC_TIMEOUT=36000000

vllm serve /data/deepseek-r1-distill-qwen-32b-gptq-int4 --served_model_name "deepseek-qwen32-gptq" -tp 2 --max-model-len $[128*1024] --quantization gptq --disable_log_stats --trust-remote-code --host 0.0.0.0 --port 8000
